This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
This Html can be accessed from http://amalrkrishna.com/.
library(sampling) #for sampling functions
library(data.table)
library(jsonlite) #reading json data
library(dplyr) #data manipulation
library(tidyr) #data manipulation
library(lubridate) #days() and hours()
library(readr)
library(UsingR)
library(plotly) #plotting
library(gepaf) #for converting polyline encoding into geographical coords
library(leaflet) #maps
library(knitr) #printing dataframes
library(kableExtra) #printing dataframes
TKeyAPIV3 <- "?api_key=29655b2934ca4235add916cb22e9b9f2"
TKeyAPIV2 <- "?api_key=DOb5b6UKM0uecKLrPskA7A"
#APIV3URLS
TRoutesURL3 <- "https://api-v3.mbta.com/routes"
TStopsURL3 <- "https://api-v3.mbta.com/stops"
TVehiclesURL3 <- "https://api-v3.mbta.com/vehicles"
TShapesURL3 <- "https://api-v3.mbta.com/shapes?route="
TSchedulesURL3 <- "https://api-v3.mbta.com/schedules?route="
TPredictionsURL3 <- "https://api-v3.mbta.com/predictions?route="
#APIV2 URLS
TRouteURL2 <- "http://realtime.mbta.com/developer/api/v2/stopsbyroute"
TTravelURL2 <- "http://realtime.mbta.com/developer/api/v2.1/traveltimes"
THeadwaysURL2 <- "http://realtime.mbta.com/developer/api/v2.1/headways"
TDwellsURL2 <- "http://realtime.mbta.com/developer/api/v2.1/dwells"
TFormat <- "&format=json"
This is the basic dataset directly downloaded from http://www.mbtabackontrack.com/performance/index.html#/download
RidData<-data.frame(read.csv("TDashboardData_ridership.csv"))
| SERVICE_MONTH | MODE_TYPE | TOTAL_WEEKDAY_RIDERSHIP_COUNT | NUMBER_OF_WEEKDAYS | AVERAGE_WEEKDAY_RIDERSHIP_COUNT | ROUTE_OR_LINE |
|---|---|---|---|---|---|
| 2015-01-01 00:00:00 | RAIL | 1154894 | 20 | 57744.68 | BLUE LINE |
| 2015-01-01 00:00:00 | BUS | 7144221 | 20 | 357211.05 | BUS |
| 2015-01-01 00:00:00 | COMMUTER RAIL | 2436317 | 20 | 121815.87 | COMMUTER RAIL |
| 2015-01-01 00:00:00 | FERRY | 51665 | 20 | 2583.26 | F1-HINGHAM-BOSTON |
| 2015-01-01 00:00:00 | FERRY | 13848 | 20 | 692.41 | F2_F2H-Quincy-Boston-Logan-Hull-Georges |
| 2015-01-01 00:00:00 | FERRY | 6070 | 20 | 303.50 | F4-BOSTON-CHARLESTOWN |
CSData<-data.frame(read.csv("TDashboardData_customersatisfaction.csv"))
| Survey.Date | Question.Group | Question.Text | Respondents | Average.Rating.Out.of.7 |
|---|---|---|---|---|
| 2016-02-01 00:00:00 | Top Line | How would you rate the mbta overall? | 519 | 4.52 |
| 2016-02-01 00:00:00 | Top Line | How would you rate this trip overall? | 502 | 4.80 |
| 2016-02-01 00:00:00 | Top Line | How likely are you to continue using the mbta in the future? | 502 | 6.37 |
| 2016-02-01 00:00:00 | Top Line | How likely are you to recommend the mbta to a friend or colleague? | 490 | 5.13 |
| 2016-02-01 00:00:00 | Perceptions | The mbta provides reliable public transportation services. | 490 | 4.04 |
| 2016-02-01 00:00:00 | Perceptions | The mbta is a cost-conscious organization. | 490 | 3.16 |
FData<-data.frame(read.csv("TDashboardData_financials.csv"))
| Reporting.Fiscal.Year | Reporting.Date | Category | Subcategory | Month.to.Date.Actual.Amount | Month.to.Date.Budgeted.Amount | Year.to.Date.Actual.Amount | Year.to.Date.Budgeted.Amount |
|---|---|---|---|---|---|---|---|
| 2015 | 2015-06-30 00:00:00 | Additional State Assistance | 8579385 | 24591674 | 125352620 | 295100000 | |
| 2015 | 2015-06-30 00:00:00 | Expenses | Debt Service | 24729468 | 32739023 | 413439712 | 423938425 |
| 2015 | 2015-06-30 00:00:00 | Expenses | Operating Expenses | 145087793 | 124309484 | 1508820862 | 1508921410 |
| 2015 | 2015-06-30 00:00:00 | Revenue | Non-Operating Revenues | 142500324 | 121956788 | 1157292600 | 1001817915 |
| 2015 | 2015-06-30 00:00:00 | Revenue | Operating Revenues | 57387316 | 56642014 | 645968041 | 646174787 |
| 2015 | 2015-06-30 00:00:00 | Transfers In | 0 | 0 | 0 | 0 |
RidershipRevExp <- plot_ly() %>%
add_bars(x=format(as.Date(unique(RidData$SERVICE_MONTH)), "%y-%m"),
y = aggregate(TOTAL_WEEKDAY_RIDERSHIP_COUNT ~ SERVICE_MONTH, RidData, sum)$TOTAL_WEEKDAY_RIDERSHIP_COUNT,name = "Ridership",yaxis="y1") %>%
add_bars(x=format(as.Date(unique(FData$Reporting.Date)), "%y-%m"), y = aggregate(Month.to.Date.Actual.Amount ~ Reporting.Date, subset(FData,Category=="Expenses"), sum)$Month.to.Date.Actual.Amount,name = "Expense",yaxis="y2") %>%
add_bars(x=format(as.Date(unique(FData$Reporting.Date)), "%y-%m"), y = aggregate(Month.to.Date.Actual.Amount ~ Reporting.Date, subset(FData,Category=="Revenue"), sum)$Month.to.Date.Actual.Amount, name = "Revenue",yaxis="y2") %>%
add_trace(x=format(as.Date(unique(CSData$Survey.Date)), "%y-%m"),y=aggregate(Average.Rating.Out.of.7 ~ Survey.Date, CSData, mean)$Average.Rating.Out.of.7, name = "Customer Satisfaction", type="scatter", mode="lines",yaxis="y3") %>%
subplot(nrows = 3, shareX = TRUE)
The Ridership stays high during March to October and usually decreases during the winter (December - April)
The gap between the expense and the revenue has been closing down in 2017 unlike 2 years back.
RoutesComplete<-fromJSON(paste(TRoutesURL3))$data
#[1:8] pickout only the subway list
RoutesList<-unique(subset(RoutesComplete, RoutesComplete$attributes$description == "Rapid Transit")$id)[1:8]
Routes data (RoutesComplete)
| type | id | links.self | attributes.type | attributes.sort_order | attributes.short_name | attributes.long_name | attributes.direction_names | attributes.description |
|---|---|---|---|---|---|---|---|---|
| route | Red | /routes/Red | 1 | 1 | Red Line | Southbound, Northbound | Rapid Transit | |
| route | Orange | /routes/Orange | 1 | 2 | Orange Line | Southbound, Northbound | Rapid Transit | |
| route | Green-B | /routes/Green-B | 0 | 3 | B | Green Line B | Westbound, Eastbound | Rapid Transit |
| route | Green-C | /routes/Green-C | 0 | 4 | C | Green Line C | Westbound, Eastbound | Rapid Transit |
| route | Green-D | /routes/Green-D | 0 | 5 | D | Green Line D | Westbound, Eastbound | Rapid Transit |
| route | Green-E | /routes/Green-E | 0 | 6 | E | Green Line E | Westbound, Eastbound | Rapid Transit |
RidershipByMode<-aggregate(TOTAL_WEEKDAY_RIDERSHIP_COUNT ~ MODE_TYPE,RidData,sum)
RidershipByModePie <- plot_ly(RidershipByMode, labels = ~MODE_TYPE, values = ~TOTAL_WEEKDAY_RIDERSHIP_COUNT, type = 'pie',textposition = 'inside', textinfo = 'label+percent') %>%
layout(title = 'Total ridership by mode of transport', showlegend = FALSE)
RidershipByRoute<-aggregate(TOTAL_WEEKDAY_RIDERSHIP_COUNT ~ ROUTE_OR_LINE ,RidData,mean)
RidershipByRouteBar <- plot_ly(RidershipByRoute, x = ~ROUTE_OR_LINE, y = ~TOTAL_WEEKDAY_RIDERSHIP_COUNT, lables = ~ROUTE_OR_LINE, color = ~ROUTE_OR_LINE, type = 'bar') %>%
layout(title = 'Total weekday ridership in a month', yaxis=list(title='Ridership'), xaxis=list(title=''), showlegend = FALSE, margin=list(b=100))
NumOfRoutesByType <- plot_ly(
x = names(table(RoutesComplete$attributes$description)),
y = as.numeric(table(RoutesComplete$attributes$description)),
type = "bar", color=names(table(RoutesComplete$attributes$description))) %>%
layout(title = 'Number of routes in the MBTA', yaxis = list(title = "Frequency"), margin=list(b=75))
if(file.exists("SchedulesComplete.rds")) {
SchedulesComplete <- readRDS("SchedulesComplete.rds")
} else {
SchedulesComplete <- data.frame(Date=as.Date(character()),
File=character(),
User=character(),
stringsAsFactors=FALSE)
for(i in RoutesList) {
SchedulesComplete<-rbind(SchedulesComplete, data.frame(fromJSON(paste(TSchedulesURL3,i,sep=""), flatten = TRUE)))
}
saveRDS(SchedulesComplete, "SchedulesComplete.rds")
}
Schedule data (SchedulesComplete)
## version data.type data.id
## 1 1.0 schedule schedule-34708374-70061-1
## 2 1.0 schedule schedule-34708374-70063-10
## 3 1.0 schedule schedule-34708374-70065-20
## 4 1.0 schedule schedule-34708374-70067-30
## 5 1.0 schedule schedule-34708374-70069-40
## 6 1.0 schedule schedule-34708374-70071-50
## data.relationships.trip.data.type data.relationships.trip.data.id
## 1 trip 34708374
## 2 trip 34708374
## 3 trip 34708374
## 4 trip 34708374
## 5 trip 34708374
## 6 trip 34708374
## data.relationships.stop.data.type data.relationships.stop.data.id
## 1 stop 70061
## 2 stop 70063
## 3 stop 70065
## 4 stop 70067
## 5 stop 70069
## 6 stop 70071
## data.relationships.route.data.type data.relationships.route.data.id
## 1 route Red
## 2 route Red
## 3 route Red
## 4 route Red
## 5 route Red
## 6 route Red
## data.attributes.timepoint data.attributes.stop_sequence
## 1 FALSE 1
## 2 FALSE 10
## 3 FALSE 20
## 4 FALSE 30
## 5 FALSE 40
## 6 FALSE 50
## data.attributes.pickup_type data.attributes.drop_off_type
## 1 0 1
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## data.attributes.departure_time data.attributes.arrival_time
## 1 2017-12-09T07:37:00-05:00 2017-12-09T07:37:00-05:00
## 2 2017-12-09T07:39:00-05:00 2017-12-09T07:39:00-05:00
## 3 2017-12-09T07:41:00-05:00 2017-12-09T07:41:00-05:00
## 4 2017-12-09T07:44:00-05:00 2017-12-09T07:44:00-05:00
## 5 2017-12-09T07:47:00-05:00 2017-12-09T07:47:00-05:00
## 6 2017-12-09T07:50:00-05:00 2017-12-09T07:50:00-05:00
NumOfTripsByType <- plot_ly(x = names(table(SchedulesComplete$data.relationships.route.data.id)), y = as.numeric(table(SchedulesComplete$data.relationships.route.data.id)), type = "bar", color = names(table(SchedulesComplete$data.relationships.route.data.id))) %>%
layout(title = "Number of Scheduled Trips in a day (Saturday)", yaxis = list(title = "Frequency"))
Code available in RMarkdown source
Green-B Line route (GreenBLineRoute)
## stop_order stop_id stop_name
## 1 1 70210 Lechmere - Inbound
## 2 10 70208 Science Park - Inbound
## 3 20 70206 North Station - Green Line Inbound
## 4 30 70204 Haymarket - Green Line Inbound
## 5 40 70202 Government Center - Green Line Westbound
## 6 50 70196 Park Street - Green Line - B Branch Berth
## parent_station parent_station_name stop_lat stop_lon
## 1 place-lech Lechmere 42.370772 -71.076536
## 2 place-spmnl Science Park 42.366664 -71.067666
## 3 place-north North Station 42.365577 -71.06129
## 4 place-haecl Haymarket 42.363021 -71.05829
## 5 place-gover Government Center 42.359705 -71.059215
## 6 place-pktrm Park Street 42.35639457 -71.0624242
##### The start date and end date for the APIV2/APIV3 data - currently set to one week data from Dec 1, 4 am to Dec 8, 4 am.
startTime <- as.POSIXct("2017-12-01 04:00:00")
endTime <- as.POSIXct("2017-12-08 04:00:00")
numWeeks <- as.integer(unclass(endTime - startTime)/7)
fromTime <- paste("&from_datetime=",as.numeric(startTime), sep="")
toTime <- paste("&to_datetime=",as.numeric(endTime), sep="")
Code available in RMarkdown Source
GreenLineBTravelTimes provides the travel times from each station to the nearest two stations within the same line. For Sampling I took 100 samples from the CombinedAllstonBUEast data which generates the travel times from Allston St to BU East from the processed. The processing steps looping through each station is hidden within the RMarkdown.
set.seed(123)
#Glimpse of GreenLineBTravelTimes
head(GreenLineBTravelTimes)
## from_stop from_name from_lat from_lon to_stop to_name
## 1 70208 Science Park 42.366664 -71.067666 70206 North Station
## 2 70206 North Station 42.365577 -71.06129 70204 Haymarket
## 3 70204 Haymarket 42.363021 -71.05829 70202 Government Center
## 4 70208 Science Park 42.366664 -71.067666 70206 North Station
## 5 70159 Boylston 42.35302 -71.06459 70157 Arlington
## 6 70206 North Station 42.365577 -71.06129 70204 Haymarket
## to_lat to_lon direction dep_dt arr_dt
## 1 42.365577 -71.06129 0 2017-12-01 05:04:27 2017-12-01 05:06:54
## 2 42.363021 -71.05829 0 2017-12-01 05:07:38 2017-12-01 05:08:33
## 3 42.359705 -71.059215 0 2017-12-01 05:09:30 2017-12-01 05:10:36
## 4 42.365577 -71.06129 0 2017-12-01 05:12:32 2017-12-01 05:15:27
## 5 42.351902 -71.070893 0 2017-12-01 05:15:44 2017-12-01 05:17:33
## 6 42.363021 -71.05829 0 2017-12-01 05:16:00 2017-12-01 05:17:00
## travel_time_sec benchmark_travel_time_sec dep_d dep_t arr_d
## 1 147 180 2017-12-01 05:04:27 2017-12-01
## 2 55 120 2017-12-01 05:07:38 2017-12-01
## 3 66 120 2017-12-01 05:09:30 2017-12-01
## 4 175 180 2017-12-01 05:12:32 2017-12-01
## 5 109 120 2017-12-01 05:15:44 2017-12-01
## 6 60 120 2017-12-01 05:16:00 2017-12-01
## arr_t
## 1 05:06:54
## 2 05:08:33
## 3 05:10:36
## 4 05:15:27
## 5 05:17:33
## 6 05:17:00
#Number of rows in GreenLineBTravelTimes
nrow(GreenLineBTravelTimes)
## [1] 82681
SamplingPlot <- plot_ly(x=GreenLineBTravelTimes$travel_time_sec, type="histogram", name="All GL stop pairs") %>%
layout(title="Green Line B travel time distribution", xaxis=list(range=c(0,200) , yaxis=list(title="Frequency")))
AllstonToBUEast<-fromJSON(paste(TTravelURL2, TKeyAPIV2, TFormat, "&from_stop=", 70126, "&to_stop=", 70146, fromTime, toTime, sep=""))[[1]][c(1:5)]
BUEastToAllston<-fromJSON(paste(TTravelURL2, TKeyAPIV2, TFormat, "&from_stop=", 70146, "&to_stop=", 70126, fromTime, toTime, sep=""))[[1]][c(1:5)]
CombinedAllstonBUEast<-rbind(AllstonToBUEast,BUEastToAllston)
CombinedAllstonBUEast$travel_time_sec<-as.numeric(CombinedAllstonBUEast$travel_time_sec)
head(CombinedAllstonBUEast)
## route_id direction dep_dt arr_dt travel_time_sec
## 1 Green-B 1 1512123340 1512124110 770
## 2 Green-B 1 1512123898 1512124613 715
## 3 Green-B 1 1512124507 1512125403 896
## 4 Green-B 1 1512125315 1512126156 841
## 5 Green-B 1 1512125940 1512126673 733
## 6 Green-B 1 1512126526 1512127434 908
SamplingPlot1 <- plot_ly(x=as.numeric(CombinedAllstonBUEast$travel_time_sec), type="histogram", name="Allston -> BU East") %>%
layout(title="Travel time distribution", xaxis=list(range=c(300,1700)), yaxis=list(title="Frequency"))
GreenLineBSubset<-aggregate(travel_time_sec ~ ., GreenLineBTravelTimes[c(2,6,9,12)], mean)
GreenLineBSubset$travel_time_sec<-as.numeric(GreenLineBSubset$travel_time_sec)
head(GreenLineBSubset)
## from_name to_name direction travel_time_sec
## 1 Griggs Street Allston Street 0 19.95825
## 2 Boylston Arlington 0 94.59102
## 3 Pleasant Street Babcock Street 0 42.86942
## 4 Kenmore Blandford Street 0 39.78655
## 5 Boston Univ. East Boston Univ. Central 0 41.92593
## 6 Blandford Street Boston Univ. East 0 65.59015
fit.subset <- density(GreenLineBSubset$travel_time_sec)
fit.allston.bu <- density(CombinedAllstonBUEast$travel_time_sec)
SamplingPlot2 <- plot_ly(x = GreenLineBSubset$travel_time_sec, type = "histogram", name="Histogram") %>%
add_trace(x = fit.subset$x, y = fit.subset$y, type = "scatter", mode = "lines", yaxis = "y2", name = "Density") %>%
layout(title="Green-B travel time (Aggregate)", yaxis=list(title="Frequency"), yaxis2 = list(overlaying = "y", side = "right"))
SamplingPlot2.1 <- plot_ly(x = CombinedAllstonBUEast$travel_time_sec, type = "histogram", name="Histogram") %>%
add_trace(x = fit.allston.bu$x, y = fit.allston.bu$y, type = "scatter", mode = "lines", yaxis = "y2", name = "Density") %>%
layout(title="Allston St -> BU East time", xaxis=list(range=c(300,1700)), yaxis=list(title="Frequency"), yaxis2 = list(overlaying = "y", side = "right"))
#Sampling with 1000 samples with sample size 5
samples.5<-1000
sample.size.5<-5
xbar.5<-numeric(samples.5)
meanGL.5<-mean(CombinedAllstonBUEast$travel_time_sec)
sdGL.5<-sd(CombinedAllstonBUEast$travel_time_sec)
for (i in 1: samples.5) {
xbar.5[i] <- mean(rnorm(sample.size.5,
meanGL.5, sdGL.5))
}
SamplingPlot3 <- plot_ly(x = xbar.5, type = "histogram", histnorm = "probability", name="Size = 5") %>%
layout(title="Sample size 5", xaxis=list(range=c(400,1600)))
#Sampling with 1000 samples with sample size 10
samples.10<-1000
sample.size.10<-10
xbar.10<-numeric(samples.10)
meanGL.10<-mean(CombinedAllstonBUEast$travel_time_sec)
sdGL.10<-sd(CombinedAllstonBUEast$travel_time_sec)
for (i in 1: samples.10) {
xbar.10[i] <- mean(rnorm(sample.size.10,
meanGL.10, sdGL.10))
}
SamplingPlot4 <- plot_ly(x = xbar.10, type = "histogram", histnorm = "probability",name="Size = 10") %>%
layout(title="Sample size 10", xaxis=list(range=c(400,1600)))
#Sampling with 1000 samples with sample size 15
samples.15<-1000
sample.size.15<-15
xbar.15<-numeric(samples.15)
meanGL.15<-mean(CombinedAllstonBUEast$travel_time_sec)
sdGL.15<-sd(CombinedAllstonBUEast$travel_time_sec)
for (i in 1: samples.15) {
xbar.15[i] <- mean(rnorm(sample.size.15,
meanGL.15, sdGL.15))
}
SamplingPlot5 <- plot_ly(x = xbar.15, type = "histogram", histnorm = "probability",name="Size = 15") %>%
layout(title="Sample size 15", xaxis=list(range=c(400,1600)))
#Sampling with 1000 samples with sample size 20
samples.20<-1000
sample.size.20<-20
xbar.20<-numeric(samples.20)
meanGL.20<-mean(CombinedAllstonBUEast$travel_time_sec)
sdGL.20<-sd(CombinedAllstonBUEast$travel_time_sec)
for (i in 1: samples.20) {
xbar.20[i] <- mean(rnorm(sample.size.20,
meanGL.20, sdGL.20))
}
SamplingPlot6 <- plot_ly(x = xbar.20, type = "histogram", histnorm = "probability" ,name="Size = 20") %>%
layout(title="Sample size 20", xaxis=list(range=c(400,1600)))
As the sample size increases, the standard deviation of the means decreases
#SRSWR
s <- srswr(100, nrow(CombinedAllstonBUEast))
rows <- (1:nrow(CombinedAllstonBUEast))[s!=0]
rows <- rep(rows, s[s != 0])
sample.1 <- CombinedAllstonBUEast[rows, ]
fit.sample.1<-density(sample.1$travel_time_sec)
SamplingPlot7 <- plot_ly(x=sample.1$travel_time_sec, type="histogram", name="SRSWR") %>%
add_trace(x = fit.sample.1$x, y = fit.sample.1$y, type = "scatter", fill = "tozeroy", mode = "lines", yaxis = "y2", name = "Density") %>%
layout(title="Simple Random Sampling : Allston St -> BU East travel Time distribution", xaxis=list(range=c(300,1700)), yaxis2 = list(overlaying = "y", side = "right"))
#SRSWOR
s <- srswor(100,nrow(CombinedAllstonBUEast))
sample.2 <- CombinedAllstonBUEast[s != 0, ]
fit.sample.2=density(sample.2$travel_time_sec)
SamplingPlot8 <- plot_ly(x=sample.2$travel_time_sec, type="histogram", name="SRSWOR") %>%
add_trace(x = fit.sample.2$x, y = fit.sample.2$y, type = "scatter", fill = "tozeroy", mode = "lines", yaxis = "y2", name = "Density") %>%
layout(title="Simple Random Sampling : Allston St -> BU East travel Time distribution", xaxis=list(range=c(300,1700)), yaxis2 = list(overlaying = "y", side = "right"))
#Systematic Sampling
N <- nrow(CombinedAllstonBUEast)
n <- 100
k <- ceiling(N / n)
r <- sample(k, 1)
# select every kth item
s <- seq(r, by = k, length = n)
sample.3 <- CombinedAllstonBUEast[s, ]
sample.3<-na.omit(sample.3)
fit.sample.3<-density(sample.3$travel_time_sec)
SamplingPlot9 <- plot_ly(x=sample.3$travel_time_sec, type="histogram", name="Systematic") %>%
add_trace(x = fit.sample.3$x, y = fit.sample.3$y, type = "scatter", fill = "tozeroy", mode = "lines", yaxis = "y2", name = "Density") %>%
layout(title="Systematic Sampling : Allston St -> BU East travel Time distribution", xaxis=list(range=c(300,1700)), yaxis2 = list(overlaying = "y", side = "right"))
fit.org.data = density(CombinedAllstonBUEast$travel_time_sec)
SamplingPlot10 <- plot_ly(x=as.numeric(CombinedAllstonBUEast$travel_time_sec), type = "histogram", name = "Histogram") %>%
add_trace(x = fit.org.data$x, y = fit.org.data$y, type = "scatter", mode = "lines", yaxis = "y2", name = "Original") %>%
add_trace(x = fit.sample.1$x, y = fit.sample.1$y, type = "scatter", mode = "lines", yaxis = "y2", name = "SRSWR") %>%
add_trace(x = fit.sample.2$x, y = fit.sample.2$y, type = "scatter", mode = "lines", yaxis = "y2", name = "SRSWOR") %>%
add_trace(x = fit.sample.3$x, y = fit.sample.3$y, type = "scatter", mode = "lines", yaxis = "y2", name = "Systematic")%>%
layout(title="Sampling methods combined - Allston St -> BU East travel time", xaxis=list(range=c(300,1700)), yaxis2 = list(overlaying = "y", side = "right"))
conf<-c(80,90)
alpha<- 1-conf/100
#For sample size 10
sample.size.10<-10
sd.sample.means.10<-sd(as.numeric(CombinedAllstonBUEast$travel_time_sec))/sqrt(sample.size.10)
sample.data.10<-sample(as.numeric(CombinedAllstonBUEast$travel_time_sec), size=sample.size.10)
xbar.10<-mean(sample.data.10)
confdf.10 <- data.frame(Date=as.Date(character()),
File=character(),
User=character(),
stringsAsFactors=FALSE)
for(i in alpha) {
cf.10<-c(100*(1-i),xbar.10-qnorm(1-i/2)*sd.sample.means.10,xbar.10+qnorm(1-i/2)*sd.sample.means.10)
confdf.10<-rbind(confdf.10,cf.10)
str<-sprintf("%2d%% Conf Level (alpha=%.2f), CI=%.2f-%.2f",100*(1-i),i,xbar.10-qnorm(1-i/2)*sd.sample.means.10,xbar.10+qnorm(1-i/2)*sd.sample.means.10)
cat(str,"\n")
}
## 80% Conf Level (alpha=0.20), CI=818.63-951.57
## 90% Conf Level (alpha=0.10), CI=799.78-970.42
colnames(confdf.10)<-c("conf","ll","ul")
fit.sample.conf.10<-density(sample.data.10)
ConfidencePlot1 <- plot_ly(x=sample.data.10, type="histogram", name="Sampled Data") %>%
add_trace(x = fit.sample.conf.10$x, y = fit.sample.conf.10$y, type = "scatter", fill = "tozeroy", mode = "lines", yaxis = "y2", name = "Density") %>%
add_trace(x = rep(mean(CombinedAllstonBUEast$travel_time_sec),2), y = c(0,25) , type = "scatter", mode = "lines", name = "Mean") %>%
add_trace(x = c(confdf.10$ll[1],confdf.10$ll[1],confdf.10$ul[1],confdf.10$ul[1]), y = c(0,25,25,0) , type = "scatter", fill = "tozeroy", mode = "lines", name = "80% conf") %>%
add_trace(x = c(confdf.10$ll[2],confdf.10$ll[2],confdf.10$ul[2],confdf.10$ul[2]), y = c(0,25,25,0) , type = "scatter", fill = "tozeroy", mode = "lines", name = "90% conf") %>%
add_trace(x = rep(mean(CombinedAllstonBUEast$travel_time_sec),2), y = c(0,25) , type = "scatter", color = "red", mode = "lines", name = "Mean") %>%
layout(title="Confidence Interval - Sample Size 10 : Allston St -> BU East travel Time distribution", xaxis=list(range=c(300,1700)), yaxis2 = list(overlaying = "y", side = "right"))
#For Sample size 50
sample.size.50<-50
sd.sample.means.50<-sd(as.numeric(CombinedAllstonBUEast$travel_time_sec))/sqrt(sample.size.50)
sample.data.50<-sample(as.numeric(CombinedAllstonBUEast$travel_time_sec), size=sample.size.50)
xbar.50<-mean(sample.data.50)
confdf.50 <- data.frame(Date=as.Date(character()),
File=character(),
User=character(),
stringsAsFactors=FALSE)
for(i in alpha) {
cf.50<-c(100*(1-i),xbar.50-qnorm(1-i/2)*sd.sample.means.50,xbar.50+qnorm(1-i/2)*sd.sample.means.50)
confdf.50<-rbind(confdf.50,cf.50)
str<-sprintf("%2d%% Conf Level (alpha=%.2f), CI=%.2f-%.2f",100*(1-i),i,xbar.50-qnorm(1-i/2)*sd.sample.means.50,xbar.50+qnorm(1-i/2)*sd.sample.means.50)
cat(str,"\n")
}
## 80% Conf Level (alpha=0.20), CI=871.83-931.29
## 90% Conf Level (alpha=0.10), CI=863.40-939.72
colnames(confdf.50)<-c("conf","ll","ul")
fit.sample.conf.50<-density(sample.data.50)
ConfidencePlot2 <- plot_ly(x=sample.data.50, type="histogram", name="Sampled Data") %>%
add_trace(x = fit.sample.conf.50$x, y = fit.sample.conf.50$y, type = "scatter", fill = "tozeroy", mode = "lines", yaxis = "y2", name = "Density") %>%
add_trace(x = rep(mean(CombinedAllstonBUEast$travel_time_sec),2), y = c(0,25) , type = "scatter", mode = "lines", name = "Mean") %>%
add_trace(x = c(confdf.50$ll[1],confdf.50$ll[1],confdf.50$ul[1],confdf.50$ul[1]), y = c(0,25,25,0) , type = "scatter", fill = "tozeroy", mode = "lines", name = "80% conf") %>%
add_trace(x = c(confdf.50$ll[2],confdf.50$ll[2],confdf.50$ul[2],confdf.50$ul[2]), y = c(0,25,25,0) , type = "scatter", fill = "tozeroy", mode = "lines", name = "90% conf") %>%
layout(title="Confidence Interval - Sample Size 50 : Allston St -> BU East travel Time distribution", xaxis=list(range=c(300,1700)), yaxis2 = list(overlaying = "y", side = "right"))
#For Sample size 75
sample.size.75<-75
sd.sample.means.75<-sd(as.numeric(CombinedAllstonBUEast$travel_time_sec))/sqrt(sample.size.75)
sample.data.75<-sample(as.numeric(CombinedAllstonBUEast$travel_time_sec), size=sample.size.75)
xbar.75<-mean(sample.data.75)
confdf.75 <- data.frame(Date=as.Date(character()),
File=character(),
User=character(),
stringsAsFactors=FALSE)
for(i in alpha) {
cf.75<-c(100*(1-i),xbar.75-qnorm(1-i/2)*sd.sample.means.75,xbar.75+qnorm(1-i/2)*sd.sample.means.75)
confdf.75<-rbind(confdf.75,cf.75)
str<-sprintf("%2d%% Conf Level (alpha=%.2f), CI=%.2f-%.2f",100*(1-i),i,xbar.75-qnorm(1-i/2)*sd.sample.means.75,xbar.75+qnorm(1-i/2)*sd.sample.means.75)
cat(str,"\n")
}
## 80% Conf Level (alpha=0.20), CI=895.43-943.98
## 90% Conf Level (alpha=0.10), CI=888.55-950.86
colnames(confdf.75)<-c("conf","ll","ul")
fit.sample.conf.75<-density(sample.data.75)
ConfidencePlot3 <- plot_ly(x=sample.data.75, type="histogram", name="Sampled Data") %>%
add_trace(x = fit.sample.conf.75$x, y = fit.sample.conf.75$y, type = "scatter", fill = "tozeroy", mode = "lines", yaxis = "y2", name = "Density") %>%
add_trace(x = rep(mean(CombinedAllstonBUEast$travel_time_sec),2), y = c(0,25) , type = "scatter", mode = "lines", name = "Mean") %>%
add_trace(x = c(confdf.75$ll[1],confdf.75$ll[1],confdf.75$ul[1],confdf.75$ul[1]), y = c(0,25,25,0) , type = "scatter", fill = "tozeroy", mode = "lines", name = "80% conf") %>%
add_trace(x = c(confdf.75$ll[2],confdf.75$ll[2],confdf.75$ul[2],confdf.75$ul[2]), y = c(0,25,25,0) , type = "scatter", fill = "tozeroy", mode = "lines", name = "90% conf") %>%
layout(title="Confidence Interval - Sample Size 75 : Allston St -> BU East travel Time distribution", xaxis=list(range=c(300,1700)), yaxis2 = list(overlaying = "y", side = "right"))
Travel times provides the travel times from each station to the nearest two stations within the same line.
head(RedLineTravelTimes)
## from_stop from_name from_lat from_lon to_stop to_name to_lat
## 1 70063 Davis 42.39674 -71.121815 70065 Porter 42.3884
## 2 70065 Porter 42.3884 -71.119149 70067 Harvard 42.373362
## 3 70067 Harvard 42.373362 -71.118956 70069 Central 42.365486
## 4 70063 Davis 42.39674 -71.121815 70065 Porter 42.3884
## 5 70069 Central 42.365486 -71.103802 70071 Kendall/MIT 42.36249079
## 6 70065 Porter 42.3884 -71.119149 70067 Harvard 42.373362
## to_lon direction dep_dt arr_dt
## 1 -71.119149 0 2017-12-01 05:21:13 2017-12-01 05:22:28
## 2 -71.118956 0 2017-12-01 05:23:22 2017-12-01 05:25:11
## 3 -71.103802 0 2017-12-01 05:26:10 2017-12-01 05:29:00
## 4 -71.119149 0 2017-12-01 05:28:58 2017-12-01 05:30:08
## 5 -71.08617653 0 2017-12-01 05:29:58 2017-12-01 05:31:37
## 6 -71.118956 0 2017-12-01 05:30:59 2017-12-01 05:32:44
## travel_time_sec benchmark_travel_time_sec dep_d dep_t arr_d
## 1 75 120 2017-12-01 05:21:13 2017-12-01
## 2 109 180 2017-12-01 05:23:22 2017-12-01
## 3 170 180 2017-12-01 05:26:10 2017-12-01
## 4 70 120 2017-12-01 05:28:58 2017-12-01
## 5 99 180 2017-12-01 05:29:58 2017-12-01
## 6 105 180 2017-12-01 05:30:59 2017-12-01
## arr_t
## 1 05:22:28
## 2 05:25:11
## 3 05:29:00
## 4 05:30:08
## 5 05:31:37
## 6 05:32:44
RedLineTrvTimeBoxPlot <- plot_ly(y=as.numeric(RedLineTravelTimes$travel_time_sec), color = RedLineTravelTimes$from_name, type = "box") %>%
layout(title="Box plot of Red Line Travel Times", yaxis=list(title="Time",range=c(0,1000)))
GreenBLineTrvTimeBoxPlot <- plot_ly(y=as.numeric(GreenLineBTravelTimes$travel_time_sec), color = GreenLineBTravelTimes$from_name, type = "box") %>%
layout(title="Box plot of Green-B line travel times", yaxis=list(title="Time",range=c(0,1000)))
GreenCLineTrvTimeBoxPlot <- plot_ly(y=as.numeric(GreenLineCTravelTimes$travel_time_sec), color = GreenLineCTravelTimes$from_name, type = "box") %>%
layout(title="Box plot of Green-C line travel times", yaxis=list(title="Time",range=c(0,1000)))
GreenDLineTrvTimeBoxPlot <- plot_ly(y=as.numeric(GreenLineDTravelTimes$travel_time_sec), color = GreenLineDTravelTimes$from_name, type = "box") %>%
layout(title="Box plot of Green-D line travel times", yaxis=list(title="Time",range=c(0,1000)))
GreenELineTrvTimeBoxPlot <- plot_ly(y=as.numeric(GreenLineETravelTimes$travel_time_sec), color = GreenLineETravelTimes$from_name, type = "box") %>%
layout(title="Box plot of Green-E line travel times", yaxis=list(title="Time",range=c(0,1000)))
OrangeLineTrvTimeBoxPlot <- plot_ly(y=as.numeric(OrangeTravelTimes$travel_time_sec), color = OrangeTravelTimes$from_name, type = "box") %>%
layout(title="Box plot of Orange line travel times", yaxis=list(title="Time",range=c(0,1000)))
BlueLineTrvTimeBoxPlot <- plot_ly(y=as.numeric(BlueLineTravelTimes$travel_time_sec), color = BlueLineTravelTimes$from_name, type = "box") %>%
layout(title="Box plot of Blue line travel times", yaxis=list(title="Time",range=c(0,1000)))
RLdensity <- with(RedLineTravelTimes %>% filter(direction == 1), tapply(travel_time_sec, INDEX = from_name, density))
RLdf <- data.frame(
x = unlist(lapply(RLdensity, "[[", "x")),
y = unlist(lapply(RLdensity, "[[", "y")),
from_name = rep(names(RLdensity), each = length(RLdensity[[1]]$x))
)
RedLineDensityPlotN <- plot_ly(RLdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(title="Density Plot for Travel times for MBTA RL trains by departing station", yaxis=list(title="Density",range=c(0,0.4)), xaxis=list(title="NorthBound",range = c(0,600)))
RLdensity <- with(RedLineTravelTimes %>% filter(direction == 0), tapply(travel_time_sec, INDEX = from_name, density))
RLdf <- data.frame(
x = unlist(lapply(RLdensity, "[[", "x")),
y = unlist(lapply(RLdensity, "[[", "y")),
from_name = rep(names(RLdensity), each = length(RLdensity[[1]]$x))
)
RedLineDensityPlotS <- plot_ly(RLdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(yaxis=list(title="Density",range=c(0,0.4)), xaxis=list(title="SouthBound",range = c(0,600)))
#Green-B
GLBdensity <- with(GreenLineBTravelTimes %>% filter(direction == 1), tapply(travel_time_sec, INDEX = from_name, density))
GLBdf <- data.frame(
x = unlist(lapply(GLBdensity, "[[", "x")),
y = unlist(lapply(GLBdensity, "[[", "y")),
from_name = rep(names(GLBdensity), each = length(GLBdensity[[1]]$x))
)
GreenLineBDensityPlotE <- plot_ly(GLBdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(title="Density Plot for Travel times for MBTA GL trains by departing station", yaxis=list(title="Density",range=c(0,0.25)), xaxis=list(title="EastBound",range = c(0,600)))
GLBdensity <- with(GreenLineBTravelTimes %>% filter(direction == 0), tapply(travel_time_sec, INDEX = from_name, density))
GLBdf <- data.frame(
x = unlist(lapply(GLBdensity, "[[", "x")),
y = unlist(lapply(GLBdensity, "[[", "y")),
from_name = rep(names(GLBdensity), each = length(GLBdensity[[1]]$x))
)
GreenLineBDensityPlotW <- plot_ly(GLBdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(yaxis=list(title="Density",range=c(0,0.25)), xaxis=list(title="West Bound",range = c(0,600)))
#Green-C
GLCdensity <- with(GreenLineCTravelTimes %>% filter(direction == 1), tapply(travel_time_sec, INDEX = from_name, density))
GLCdf <- data.frame(
x = unlist(lapply(GLCdensity, "[[", "x")),
y = unlist(lapply(GLCdensity, "[[", "y")),
from_name = rep(names(GLCdensity), each = length(GLCdensity[[1]]$x))
)
GreenLineCDensityPlotE <- plot_ly(GLBdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(title="Density Plot for Green-C travel times trains by departing station", yaxis=list(title="Density",range=c(0,0.25)), xaxis=list(title="EastBound",range = c(0,600)))
GLCdensity <- with(GreenLineCTravelTimes %>% filter(direction == 0), tapply(travel_time_sec, INDEX = from_name, density))
GLCdf <- data.frame(
x = unlist(lapply(GLCdensity, "[[", "x")),
y = unlist(lapply(GLCdensity, "[[", "y")),
from_name = rep(names(GLCdensity), each = length(GLCdensity[[1]]$x))
)
GreenLineCDensityPlotW <- plot_ly(GLCdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(yaxis=list(title="Density",range=c(0,0.25)), xaxis=list(title="West Bound",range = c(0,600)))
#Green-D
GLDdensity <- with(GreenLineDTravelTimes %>% filter(direction == 1), tapply(travel_time_sec, INDEX = from_name, density))
GLDdf <- data.frame(
x = unlist(lapply(GLDdensity, "[[", "x")),
y = unlist(lapply(GLDdensity, "[[", "y")),
from_name = rep(names(GLDdensity), each = length(GLDdensity[[1]]$x))
)
GreenLineDDensityPlotE <- plot_ly(GLDdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(title="Density Plot for Green-D travel times trains by departing station", yaxis=list(title="Density",range=c(0,0.25)), xaxis=list(title="EastBound",range = c(0,600)))
GLDdensity <- with(GreenLineDTravelTimes %>% filter(direction == 0), tapply(travel_time_sec, INDEX = from_name, density))
GLDdf <- data.frame(
x = unlist(lapply(GLDdensity, "[[", "x")),
y = unlist(lapply(GLDdensity, "[[", "y")),
from_name = rep(names(GLDdensity), each = length(GLDdensity[[1]]$x))
)
GreenLineDDensityPlotW <- plot_ly(GLDdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(yaxis=list(title="Density",range=c(0,0.25)), xaxis=list(title="West Bound",range = c(0,600)))
#Green-E
GLEdensity <- with(GreenLineETravelTimes %>% filter(direction == 1), tapply(travel_time_sec, INDEX = from_name, density))
GLEdf <- data.frame(
x = unlist(lapply(GLEdensity, "[[", "x")),
y = unlist(lapply(GLEdensity, "[[", "y")),
from_name = rep(names(GLEdensity), each = length(GLEdensity[[1]]$x))
)
GreenLineEDensityPlotE <- plot_ly(GLEdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(title="Density Plot for Green-E travel times trains by departing station", yaxis=list(title="Density",range=c(0,0.25)), xaxis=list(title="EastBound",range = c(0,600)))
GLEdensity <- with(GreenLineETravelTimes %>% filter(direction == 0), tapply(travel_time_sec, INDEX = from_name, density))
GLEdf <- data.frame(
x = unlist(lapply(GLEdensity, "[[", "x")),
y = unlist(lapply(GLEdensity, "[[", "y")),
from_name = rep(names(GLEdensity), each = length(GLEdensity[[1]]$x))
)
GreenLineEDensityPlotW <- plot_ly(GLEdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(yaxis=list(title="Density",range=c(0,0.25)), xaxis=list(title="West Bound",range = c(0,600)))### Generate Density plot for #
#Travel Times for Orange Line with Plotly
OLdensity <- with(OrangeTravelTimes %>% filter(direction == 1), tapply(travel_time_sec, INDEX = from_name, density))
OLdf <- data.frame(
x = unlist(lapply(OLdensity, "[[", "x")),
y = unlist(lapply(OLdensity, "[[", "y")),
from_name = rep(names(OLdensity), each = length(OLdensity[[1]]$x))
)
OrangeLineDensityPlotN <- plot_ly(OLdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(title="Density Plot for Travel times for MBTA OL trains by departing station", yaxis=list(title="Density",range=c(0,0.4)), xaxis=list(title="NorthBound",range = c(0,600)))
OLdensity <- with(OrangeTravelTimes %>% filter(direction == 0), tapply(travel_time_sec, INDEX = from_name, density))
OLdf <- data.frame(
x = unlist(lapply(OLdensity, "[[", "x")),
y = unlist(lapply(OLdensity, "[[", "y")),
from_name = rep(names(OLdensity), each = length(OLdensity[[1]]$x))
)
OrangeLineDensityPlotS <- plot_ly(OLdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(yaxis=list(title="Density",range=c(0,0.4)), xaxis=list(title="SouthBound",range = c(0,600)))
#Travel Times for Blue Line with Plotly
BLdensity <- with(BlueLineTravelTimes %>% filter(direction == 1), tapply(travel_time_sec, INDEX = from_name, density))
BLdf <- data.frame(
x = unlist(lapply(BLdensity, "[[", "x")),
y = unlist(lapply(BLdensity, "[[", "y")),
from_name = rep(names(BLdensity), each = length(BLdensity[[1]]$x))
)
BlueLineDensityPlotN <- plot_ly(BLdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(title="Density Plot for Travel times for MBTA OL trains by departing station", yaxis=list(title="Density",range=c(0,0.4)), xaxis=list(title="NorthBound",range = c(0,600)))
BLdensity <- with(BlueLineTravelTimes %>% filter(direction == 0), tapply(travel_time_sec, INDEX = from_name, density))
BLdf <- data.frame(
x = unlist(lapply(BLdensity, "[[", "x")),
y = unlist(lapply(BLdensity, "[[", "y")),
from_name = rep(names(BLdensity), each = length(BLdensity[[1]]$x))
)
BlueLineDensityPlotS <- plot_ly(BLdf, x = ~x, y = ~y, color = ~from_name, fill = 'tozeroy') %>%
add_lines() %>%
layout(yaxis=list(title="Density",range=c(0,0.4)), xaxis=list(title="SouthBound",range = c(0,600)))
A Density Plot visualises the distribution of data over a continuous interval or time period.
Time spent by the train at a station. PS:I believe the data might be inconsistent
RedLineDwellTimes Data
head(RedLineDwellTimes)
## route_id direction arr_dt dep_dt dwell_time_sec stop_id
## 1 Red 0 1512124096 1512124647 551 70061
## 2 Red 0 1512124553 1512125223 670 70061
## 3 Red 0 1512125048 1512125551 503 70061
## 4 Red 0 1512125788 1512126056 268 70061
## 5 Red 0 1512126522 1512126780 258 70061
## 6 Red 0 1512126708 1512126973 265 70061
## stop_name hour_time
## 1 Alewife 05
## 2 Alewife 05
## 3 Alewife 05
## 4 Alewife 05
## 5 Alewife 06
## 6 Alewife 06
df<-RedLineDwellTimes[,c(5,7,8)]
df$dwell_time_sec<-as.double(df$dwell_time_sec)
df1<-aggregate(dwell_time_sec ~ ., df, mean)
df2<-aggregate(dwell_time_sec ~ hour_time, df1, mean)
df3<-GreenLineBDwellTimes[,c(5,7,8)]
df3$dwell_time_sec<-as.double(df3$dwell_time_sec)
df4<-aggregate(dwell_time_sec ~ ., df3, mean)
df5<-aggregate(dwell_time_sec ~ hour_time, df4, mean)
df6<-OrangeLineDwellTimes[,c(5,7,8)]
df6$dwell_time_sec<-as.double(df6$dwell_time_sec)
df7<-aggregate(dwell_time_sec ~ ., df6, mean)
df8<-aggregate(dwell_time_sec ~ hour_time, df7, mean)[-c(2),]
df2new1<-cbind(df2,rep("Red",nrow(df2)))
colnames(df2new1)<-c("hour_time","dwell_time_sec","route_name")
df5new1<-cbind(df5,rep("Green-B",nrow(df5)))
colnames(df5new1)<-c("hour_time","dwell_time_sec","route_name")
df8new1<-cbind(df8,rep("Orange",nrow(df8)))
colnames(df8new1)<-c("hour_time","dwell_time_sec","route_name")
dfF<-rbind(df2new1,df5new1,df8new1)
dfF$dwell_time_sec<-as.numeric(dfF$dwell_time_sec)
RGODwellPlot<-plot_ly(dfF, x = ~hour_time, y = ~dwell_time_sec, color = ~route_name, type="bar", name="Dwell") %>%
layout(title="Dwell Times of trains during hours in a day",yaxis=list(title="Dwell time"),xaxis=list(title=""))
Headways track the time between departures at a given station. When trains are running late, headways exceed their benchmark targets. The underlying causes of slow/bad service can probably be better picked apart from travel times and dwell times, and the eventual impact to riders is most cleanly seen in headways.
RedLineHeadway<-na.omit(RedLineHeadway)
head(RedLineHeadway)
## from_stop from_name from_lat from_lon to_stop to_name to_lat
## 1 70063 Davis 42.39674 -71.121815 70065 Porter 42.3884
## 2 70065 Porter 42.3884 -71.119149 70067 Harvard 42.373362
## 3 70063 Davis 42.39674 -71.121815 70065 Porter 42.3884
## 4 70067 Harvard 42.373362 -71.118956 70069 Central 42.365486
## 5 70065 Porter 42.3884 -71.119149 70067 Harvard 42.373362
## 6 70069 Central 42.365486 -71.103802 70071 Kendall/MIT 42.36249079
## to_lon prev_route_id direction current_dep_dt
## 1 -71.119149 Red 0 2017-12-01 05:28:58
## 2 -71.118956 Red 0 2017-12-01 05:30:59
## 3 -71.119149 Red 0 2017-12-01 05:33:45
## 4 -71.103802 Red 0 2017-12-01 05:34:49
## 5 -71.118956 Red 0 2017-12-01 05:35:49
## 6 -71.08617653 Red 0 2017-12-01 05:38:22
## previous_dep_dt headway_time_sec benchmark_headway_time_sec
## 1 2017-12-01 05:21:13 465 480
## 2 2017-12-01 05:23:22 457 405
## 3 2017-12-01 05:28:58 287 405
## 4 2017-12-01 05:26:10 519 420
## 5 2017-12-01 05:30:59 290 405
## 6 2017-12-01 05:29:58 504 405
## current_dep_d current_dep_t previous_dep_d previous_dep_t
## 1 2017-12-01 05:28:58 2017-12-01 05:21:13
## 2 2017-12-01 05:30:59 2017-12-01 05:23:22
## 3 2017-12-01 05:33:45 2017-12-01 05:28:58
## 4 2017-12-01 05:34:49 2017-12-01 05:26:10
## 5 2017-12-01 05:35:49 2017-12-01 05:30:59
## 6 2017-12-01 05:38:22 2017-12-01 05:29:58
RedLineHeadwayBoxPlot <- plot_ly(RedLineHeadway, y = ~(headway_time_sec), color = ~from_name, type = "box")%>%
layout(title="Headway Red Line", yaxis=list(title="Headway time",range=c(0,2000)))
###Plot Green Line B Headway
GreenLineBHeadway<-na.omit(GreenLineBHeadway)
GreenLineBHeadwayBoxPlot <- plot_ly(GreenLineBHeadway, y = ~(headway_time_sec), color = ~from_name, type = "box")%>%
layout(title="Headway Green-B line", yaxis=list(title="Headway time",range=c(0,2000)))
###Plot Green Line C Headway
GreenLineCHeadway<-na.omit(GreenLineCHeadway)
GreenLineCHeadwayBoxPlot <- plot_ly(GreenLineCHeadway, y = ~(headway_time_sec), color = ~from_name, type = "box")%>%
layout(title="Headway Green-C line", yaxis=list(title="Headway time",range=c(0,2000)))
###Plot Green Line D Headway
GreenLineDHeadway<-na.omit(GreenLineDHeadway)
GreenLineDHeadwayBoxPlot <- plot_ly(GreenLineDHeadway, y = ~(headway_time_sec), color = ~from_name, type = "box")%>%
layout(title="Headway Green-D line", yaxis=list(title="Headway time",range=c(0,2000)))
###Plot Green Line E Headway
GreenLineEHeadway<-na.omit(GreenLineEHeadway)
GreenLineEHeadwayBoxPlot <- plot_ly(GreenLineEHeadway, y = ~(headway_time_sec), color = ~from_name, type = "box")%>%
layout(title="Headway Green-E line", yaxis=list(title="Headway time",range=c(0,2000)))
###Plot Orange Line Headway
OrangeLineHeadway<-na.omit(OrangeLineHeadway)
OrangeLineHeadwayBoxPlot <- plot_ly(OrangeLineHeadway, y = ~(headway_time_sec), color = ~from_name, type = "box")%>%
layout(title="Headway Orange line", yaxis=list(title="Headway time",range=c(0,2000)))
###Plot Blue Line Headway
BlueLineHeadway<-na.omit(BlueLineHeadway)
BlueLineHeadwayBoxPlot <- plot_ly(BlueLineHeadway, y = ~(headway_time_sec), color = ~from_name, type = "box")%>%
layout(title="Headway Blue line", yaxis=list(title="Headway time",range=c(0,2000)))
RLHours<-strftime(as.POSIXct(as.numeric(RedLineHeadway$current_dep_dt), origin="1970-01-01"), format="%H")
RedLineHeatMap<-data.frame(RedLineHeadway$from_name, as.numeric(RLHours), as.numeric(RedLineHeadway$headway_time_sec))
colnames(RedLineHeatMap)<-c("from_name","hours","headway_time_sec")
RedLineHeatMap1<-subset(RedLineHeatMap, (RedLineHeatMap$hours != 1) & (RedLineHeatMap$hours != 4) & (RedLineHeatMap$hours != 5))
RedLineHeatMap1[RedLineHeatMap1$hours==0,]$hours <- 24
RLheatmapagg<-aggregate(headway_time_sec ~ ., RedLineHeatMap1,mean)
RLHlen<-(nrow(RLheatmapagg)/length(unique(RLheatmapagg$from_name)))
RLheatmapmatrix<-matrix(RLheatmapagg$headway_time_sec,ncol=RLHlen)
RLHeatMapPlot <- plot_ly(
x = unique(RLheatmapagg$hours), y = unique(RLheatmapagg$from_name), colorscale = "Greys",
z = RLheatmapmatrix, type = "heatmap"
)%>%
layout(title="Red Line Line heat map", margin = list(l=150), yaxis=list(autorange='reversed'))
#Green-B
GLBHours<-strftime(as.POSIXct(as.numeric(GreenLineBHeadway$current_dep_dt), origin="1970-01-01"), format="%H")
GreenLineBHeatMap<-data.frame(GreenLineBHeadway$from_name, as.numeric(GLBHours), as.numeric(GreenLineBHeadway$headway_time_sec))
colnames(GreenLineBHeatMap)<-c("from_name","hours","headway_time_sec")
GreenLineBHeatMap1<-subset(GreenLineBHeatMap, (GreenLineBHeatMap$hours != 1) & (GreenLineBHeatMap$hours != 4) & (GreenLineBHeatMap$hours != 5))
GreenLineBHeatMap1[GreenLineBHeatMap1$hours==0,]$hours <- 24
GLBheatmapagg<-aggregate(headway_time_sec ~ ., GreenLineBHeatMap1,mean)
GLBHlen<-(nrow(GLBheatmapagg)/length(unique(GLBheatmapagg$from_name)))
GLBheatmapmatrix<-matrix(GLBheatmapagg$headway_time_sec,ncol=GLBHlen)
GLBHeatMapPlot <- plot_ly(
x = unique(GLBheatmapagg$hours), y = unique(GLBheatmapagg$from_name), colorscale = "Greys",
z = GLBheatmapmatrix, type = "heatmap"
)%>%
layout(title="Green-B Line heat map", margin = list(l=150), yaxis=list(autorange='reversed'))
#Green-C
GLCHours<-strftime(as.POSIXct(as.numeric(GreenLineCHeadway$current_dep_dt), origin="1970-01-01"), format="%H")
GreenLineCHeatMap<-data.frame(GreenLineCHeadway$from_name, as.numeric(GLCHours), as.numeric(GreenLineCHeadway$headway_time_sec))
colnames(GreenLineCHeatMap)<-c("from_name","hours","headway_time_sec")
GreenLineCHeatMap1<-subset(GreenLineCHeatMap, (GreenLineCHeatMap$hours != 1) & (GreenLineCHeatMap$hours != 4) & (GreenLineCHeatMap$hours != 5))
GreenLineCHeatMap1[GreenLineCHeatMap1$hours==0,]$hours <- 24
GLCheatmapagg<-aggregate(headway_time_sec ~ ., GreenLineCHeatMap1,mean)
GLCHlen<-(nrow(GLCheatmapagg)/length(unique(GLCheatmapagg$from_name)))
GLCheatmapmatrix<-matrix(GLCheatmapagg$headway_time_sec,ncol=GLCHlen)
GLCHeatMapPlot <- plot_ly(
x = unique(GLCheatmapagg$hours), y = unique(GLCheatmapagg$from_name), colorscale = "Greys",
z = GLCheatmapmatrix, type = "heatmap"
)%>%
layout(title="Green-C Line heat map", margin = list(l=150), yaxis=list(autorange='reversed'))
#Green-D
GLDHours<-strftime(as.POSIXct(as.numeric(GreenLineDHeadway$current_dep_dt), origin="1970-01-01"), format="%H")
GreenLineDHeatMap<-data.frame(GreenLineDHeadway$from_name, as.numeric(GLDHours), as.numeric(GreenLineDHeadway$headway_time_sec))
colnames(GreenLineDHeatMap)<-c("from_name","hours","headway_time_sec")
GreenLineDHeatMap1<-subset(GreenLineDHeatMap, (GreenLineDHeatMap$hours != 1) & (GreenLineDHeatMap$hours != 4) & (GreenLineDHeatMap$hours != 5))
GreenLineDHeatMap1[GreenLineDHeatMap1$hours==0,]$hours <- 24
GLDheatmapagg<-aggregate(headway_time_sec ~ ., GreenLineDHeatMap1,mean)
GLDHlen<-(nrow(GLDheatmapagg)/length(unique(GLDheatmapagg$from_name)))
GLDheatmapmatrix<-matrix(GLDheatmapagg$headway_time_sec,ncol=GLDHlen)
GLDHeatMapPlot <- plot_ly(
x = unique(GLDheatmapagg$hours), y = unique(GLDheatmapagg$from_name), colorscale = "Greys",
z = GLDheatmapmatrix, type = "heatmap"
)%>%
layout(title="Green-D Line heat map", margin = list(l=150), yaxis=list(autorange='reversed'))
#Green-E
GLEHours<-strftime(as.POSIXct(as.numeric(GreenLineEHeadway$current_dep_dt), origin="1970-01-01"), format="%H")
GreenLineEHeatMap<-data.frame(GreenLineEHeadway$from_name, as.numeric(GLEHours), as.numeric(GreenLineEHeadway$headway_time_sec))
colnames(GreenLineEHeatMap)<-c("from_name","hours","headway_time_sec")
GreenLineEHeatMap1<-subset(GreenLineEHeatMap, (GreenLineEHeatMap$hours != 1) & (GreenLineEHeatMap$hours != 4) & (GreenLineEHeatMap$hours != 5))
GreenLineEHeatMap1[GreenLineEHeatMap1$hours==0,]$hours <- 24
GLEheatmapagg<-aggregate(headway_time_sec ~ ., GreenLineEHeatMap1,mean)
GLEHlen<-(nrow(GLEheatmapagg)/length(unique(GLEheatmapagg$from_name)))
GLEheatmapmatrix<-matrix(GLEheatmapagg$headway_time_sec,ncol=GLEHlen)
GLEHeatMapPlot <- plot_ly(
x = unique(GLEheatmapagg$hours), y = unique(GLEheatmapagg$from_name), colorscale = "Greys",
z = GLEheatmapmatrix, type = "heatmap"
)%>%
layout(title="Green-E Line heat map", margin = list(l=150), yaxis=list(autorange='reversed'))
#Orange-Line
OLHours<-strftime(as.POSIXct(as.numeric(OrangeLineHeadway$current_dep_dt), origin="1970-01-01"), format="%H")
OrangeLineHeatMap<-data.frame(OrangeLineHeadway$from_name, as.numeric(OLHours), as.numeric(OrangeLineHeadway$headway_time_sec))
colnames(OrangeLineHeatMap)<-c("from_name","hours","headway_time_sec")
OrangeLineHeatMap1<-subset(OrangeLineHeatMap, (OrangeLineHeatMap$hours != 1) & (OrangeLineHeatMap$hours != 4) & (OrangeLineHeatMap$hours != 5))
OrangeLineHeatMap1[OrangeLineHeatMap1$hours==0,]$hours <- 24
OLheatmapagg<-aggregate(headway_time_sec ~ ., OrangeLineHeatMap1,mean)
OLHlen<-(nrow(OLheatmapagg)/length(unique(OLheatmapagg$from_name)))
OLheatmapmatrix<-matrix(OLheatmapagg$headway_time_sec,ncol=OLHlen)
OLHeatMapPlot <- plot_ly(
x = unique(as.character(OLheatmapagg$hours)), y = unique(OLheatmapagg$from_name), colorscale = "Greys",
z = OLheatmapmatrix, type = "heatmap"
)%>%
layout(title="Orange Line heat map", margin = list(l=150), yaxis=list(autorange='reversed'))
#Blue-Line
BLHours<-strftime(as.POSIXct(as.numeric(BlueLineHeadway$current_dep_dt), origin="1970-01-01"), format="%H")
BlueLineHeatMap<-data.frame(BlueLineHeadway$from_name, as.numeric(BLHours), as.numeric(BlueLineHeadway$headway_time_sec))
colnames(BlueLineHeatMap)<-c("from_name","hours","headway_time_sec")
BlueLineHeatMap1<-subset(BlueLineHeatMap, (BlueLineHeatMap$hours != 1) & (BlueLineHeatMap$hours != 4) & (BlueLineHeatMap$hours != 5))
BlueLineHeatMap1[BlueLineHeatMap1$hours==0,]$hours <- 24
BLheatmapagg<-aggregate(headway_time_sec ~ ., BlueLineHeatMap1,mean)
BLHlen<-(nrow(BLheatmapagg)/length(unique(BLheatmapagg$from_name)))
BLheatmapmatrix<-matrix(BLheatmapagg$headway_time_sec,ncol=BLHlen)
BLHeatMapPlot <- plot_ly(
x = unique(as.character(BLheatmapagg$hours)), y = unique(BLheatmapagg$from_name), colorscale = "Greys",
z = BLheatmapmatrix, type = "heatmap"
)%>%
layout(title="Blue Line heat map", margin = list(l=150), yaxis=list(autorange='reversed'))
PS: Plotly maps doesn’t display in the viewer panel of RStudio and has to be exported as html which is why I shifted towards Leaflet.
StopsComplete<-data.frame(fromJSON(paste(TStopsURL3))$data)
MBTAStaticMap <- leaflet() %>%
setView(lng = -71.0589, lat = 42.3601, zoom = 12) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolylines(as.numeric(RedLineRoute1$stop_lon),as.numeric(RedLineRoute1$stop_lat),color="red") %>%
addCircleMarkers(as.numeric(RedLineRoute1$stop_lon),as.numeric(RedLineRoute1$stop_lat),radius=1,popup = RedLineRoute1$parent_station_name) %>%
addPolylines(as.numeric(GreenBLineRoute1$stop_lon),as.numeric(GreenBLineRoute1$stop_lat),color="green") %>%
addCircleMarkers(as.numeric(GreenBLineRoute1$stop_lon),as.numeric(GreenBLineRoute1$stop_lat),radius=1,popup = GreenBLineRoute1$parent_station_name) %>%
addPolylines(as.numeric(GreenCLineRoute1$stop_lon),as.numeric(GreenCLineRoute1$stop_lat),color="green") %>%
addCircleMarkers(as.numeric(GreenCLineRoute1$stop_lon),as.numeric(GreenCLineRoute1$stop_lat),radius=1,popup = GreenCLineRoute1$parent_station_name) %>%
addPolylines(as.numeric(GreenDLineRoute1$stop_lon),as.numeric(GreenDLineRoute1$stop_lat),color="green") %>%
addCircleMarkers(as.numeric(GreenDLineRoute1$stop_lon),as.numeric(GreenDLineRoute1$stop_lat),radius=1,popup = GreenDLineRoute1$parent_station_name) %>%
addPolylines(as.numeric(GreenELineRoute1$stop_lon),as.numeric(GreenELineRoute1$stop_lat),color="green") %>%
addCircleMarkers(as.numeric(GreenELineRoute1$stop_lon),as.numeric(GreenELineRoute1$stop_lat),radius=1,popup = GreenELineRoute1$parent_station_name) %>%
addPolylines(as.numeric(BlueLineRoute1$stop_lon),as.numeric(BlueLineRoute1$stop_lat),color="blue") %>%
addCircleMarkers(as.numeric(BlueLineRoute1$stop_lon),as.numeric(BlueLineRoute1$stop_lat),radius=1,popup = BlueLineRoute1$parent_station_name) %>%
addPolylines(as.numeric(OrangeLineRoute1$stop_lon),as.numeric(OrangeLineRoute1$stop_lat),color="orange") %>%
addCircleMarkers(as.numeric(OrangeLineRoute1$stop_lon),as.numeric(OrangeLineRoute1$stop_lat),radius=1,popup = OrangeLineRoute1$parent_station_name) %>%
addPolylines(as.numeric(MattapanLineRoute1$stop_lon),as.numeric(MattapanLineRoute1$stop_lat),color="red") %>%
addCircleMarkers(as.numeric(MattapanLineRoute1$stop_lon),as.numeric(MattapanLineRoute1$stop_lat),radius=1,popup = MattapanLineRoute1$parent_station_name)
VehiclesComplete<-data.frame(fromJSON(paste(TVehiclesURL3))$data)
ShapesComplete <- data.frame(Date=as.Date(character()),
File=character(),
User=character(),
stringsAsFactors=FALSE)
PredictionsComplete <- data.frame(Date=as.Date(character()),
File=character(),
User=character(),
stringsAsFactors=FALSE)
for(i in RoutesList) {
ShapesComplete<-rbind(ShapesComplete, data.frame(fromJSON(paste(TShapesURL3,i,sep=""), flatten = TRUE)))
PredictionsComplete<-rbind(PredictionsComplete, data.frame(fromJSON(paste(TPredictionsURL3,i,sep=""), flatten = TRUE)))
}
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated some of the plots.